# read in csv file
trees <- read.csv("./Burlington_Trees.csv")
library("magrittr")
# separate Geo.Point column to latitude and longitude
# and convert to numeric variables
trees <- trees %>%
separate(Geo.Point, c("lat", "long"), ",") %>%
mutate(lat = as.numeric(lat),
long = as.numeric(long))
# separate species column into genus, species
trees <- trees %>%
separate(species, c("genus", "species"), ",")
Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4170 rows [1, 4, 5, 7, 9, 13, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, ...].
# remove the "spp" included as a place holder for genus
trees$genus <- gsub(" spp", "", trees$genus)
# correct the misspelling of mapl to maple
trees$genus <- gsub("mapl", "maple", trees$genus, fixed = TRUE)
trees$genus <- gsub("maplee", "maple", trees$genus)
# convert zeros in numeric columns to NA so they will not be included in graphs
# in this case, zero values are due to lack of information, not lack of value,
# so all were converted to NA values to be filtered out later
# repeat for blank values
trees[trees == 0] <- NA
trees[trees == ""] <- NA
# convert dates to better format
trees <- trees %>%
mutate(modified = as.yearmon(modified, "%m/%Y"))
# cleaned data frame
head(trees)
NA
```r
# number of trees by species
# histogram
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
## Relationship between species abundance and Land Use
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuIyBsYW5kdXNlIHYgc3BlY2llc1xuIyBtdWx0aXBsZSBiYXIgY2hhcnRcbiMgc2VlIGlmIGNlcnRhaW4gdHlwZXMgb2YgdHJlZXMgYXJlIG1vcmUgY29tbW9uIGJ5IGJ1c2luZXNzLCByZXNpZGVudGlhbCwgZXRjLlxuXG4jIGZvcm1hdCB0aGUgZGF0YSBpbnRvIGEgbmV3IGRhdGEgZnJhbVxubGFuZHVzZV9ieV9nZW51cyA8LSB4dGFicyhmb3JtdWxhID0gfiBsYW5kdXNlICsgZ2VudXMsXG4gICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSB0cmVlcykgJT4lIFxuICBwcm9wLnRhYmxlKG1hcmdpbiA9IFwibGFuZHVzZVwiKSAlPiUgICMgY2FsY3VsYXRlcyBwcm9wb3J0aW9cbiAgXG4gICMgcGlwZSBpbnRvIGRhdGEuZnJhbWVcbiAgIyBVc2luZyBkYXRhLmZyYW1lKCkgdHVybnMgaXQgZnJvbSBhIHRhYmxlIGludG8gYSBkYXRhIGZyYW1lXG4gIGRhdGEuZnJhbWUoKSAlPiUgXG4gIGZpbHRlcihGcmVxID4gLjEpIFxuXG5sYW5kdXNlX2J5X2dlbnVzXG5gYGAifQ== -->
```r
# landuse v species
# multiple bar chart
# see if certain types of trees are more common by business, residential, etc.
# format the data into a new data fram
landuse_by_genus <- xtabs(formula = ~ landuse + genus,
data = trees) %>%
prop.table(margin = "landuse") %>% # calculates proportio
# pipe into data.frame
# Using data.frame() turns it from a table into a data frame
data.frame() %>%
filter(Freq > .1)
landuse_by_genus
landuse_species_bar <- ggplot(data = landuse_by_genus,
mapping = aes(x = landuse,
fill = genus,
y = Freq)) +
# Now we need to tell geom_bar() to override it'd default choice of y
# We do this with stat = "identity" (identity means "Use the y I gave you!")
geom_bar(color = "black",
stat = "identity",
position = "fill") +
labs(y = "Proportion",
title = "Top Genus per Land Use Type",
x = "Land Use Type")
landuse_species_bar
```r
# diameter v appraisal
# scatterplot probably
```